bitkeeper revision 1.794.1.4 (40623aebq_XP4MvV6YJsXGleofDYNg)
authoriap10@tetris.cl.cam.ac.uk <iap10@tetris.cl.cam.ac.uk>
Thu, 25 Mar 2004 01:50:35 +0000 (01:50 +0000)
committeriap10@tetris.cl.cam.ac.uk <iap10@tetris.cl.cam.ac.uk>
Thu, 25 Mar 2004 01:50:35 +0000 (01:50 +0000)
shadow mode improvements :
use hash table to avoid increasing pfn_info size.
improved locking in preparation for SMP guests.

12 files changed:
xen/arch/i386/process.c
xen/arch/i386/traps.c
xen/common/debug.c
xen/common/domain.c
xen/common/memory.c
xen/common/perfc.c
xen/common/shadow.c
xen/include/asm-i386/config.h
xen/include/asm-i386/processor.h
xen/include/xeno/mm.h
xen/include/xeno/shadow.h
xen/net/dev.c

index 2c5380108935c09676f6ccc7dedfc74b9f9d9766..f4b2ef4006a7168f6ace1e64c134a1e1eb8de3aa 100644 (file)
@@ -282,25 +282,14 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p)
     }
 
     /* Switch page tables.  */
-#ifdef CONFIG_SHADOW
-
-    /*    printk("switch_to %08lx, %08lx\n", next_p->mm.pagetable,
-          next_p->mm.shadowtable);*/
-
-
-    if( next_p->mm.shadowmode )
+    if( next_p->mm.shadow_mode )
       {
-       check_pagetable( next_p->mm.pagetable, "switch" );
-       write_cr3_counted(pagetable_val(next_p->mm.shadowtable));
+       check_pagetable( next_p, next_p->mm.pagetable, "switch" );
+       write_cr3_counted(pagetable_val(next_p->mm.shadow_table));
       }
     else
-#endif
       write_cr3_counted(pagetable_val(next_p->mm.pagetable));
 
-
-
-
-
     set_current(next_p);
 
     /* Switch GDT and LDT. */
index eed1fb9faf330a5a3240dbc1e69d406c54476254..f35e0c898d22acf247fb92f11edcdc0ccd7fb039 100644 (file)
@@ -339,13 +339,11 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
             return; /* successfully copied the mapping */
     }
 
-#ifdef CONFIG_SHADOW
-    if ( p->mm.shadowmode && addr < PAGE_OFFSET &&
+    if ( unlikely( p->mm.shadow_mode ) && addr < PAGE_OFFSET &&
         shadow_fault( addr, error_code ) )
       {
        return; // return true if fault was handled 
       }
-#endif
 
     if ( unlikely(!(regs->xcs & 3)) )
         goto fault_in_hypervisor;
index 4e298bbfb5360ed0329ec9892bef221beff2e673..2956b0ea8cff90ac5ad3526b3fb852d597c05cb8 100644 (file)
@@ -91,11 +91,9 @@ int pdb_change_values(domid_t domain, u_char *buffer, unsigned long addr,
 
     if ((addr >> PAGE_SHIFT) == ((addr + length - 1) >> PAGE_SHIFT))
     {
-#ifdef CONFIG_SHADOW
-        if (p->mm.shadowmode )
-          l2_table = map_domain_mem(pagetable_val(p->mm.shadowtable));
+        if (p->mm.shadow_mode )
+          l2_table = map_domain_mem(pagetable_val(p->mm.shadow_table));
        else
-#endif
           l2_table = map_domain_mem(pagetable_val(p->mm.pagetable));
 
        l2_table += l2_table_offset(addr);
index c63c9164e3ce5875f64ea7e9b351125161f7ef9b..360677458d352710746e0faed668d7dfa32ba4d4 100644 (file)
@@ -341,12 +341,14 @@ void free_domain_page(struct pfn_info *page)
         if ( !(page->count_and_flags & PGC_zombie) )
         {
             page->tlbflush_timestamp = tlbflush_clock;
-            page->u.cpu_mask = 1 << p->processor;
-
-            spin_lock(&p->page_list_lock);
-            list_del(&page->list);
-            p->tot_pages--;
-            spin_unlock(&p->page_list_lock);
+           if (p)
+           {
+                page->u.cpu_mask = 1 << p->processor;
+                spin_lock(&p->page_list_lock);
+               list_del(&page->list);
+               p->tot_pages--;
+               spin_unlock(&p->page_list_lock);
+           }
         }
 
         page->count_and_flags = 0;
@@ -547,10 +549,6 @@ int final_setup_guestos(struct task_struct *p, dom0_builddomain_t *builddomain)
     get_page_and_type(&frame_table[phys_l2tab>>PAGE_SHIFT], p, 
                       PGT_l2_page_table);
 
-#ifdef CONFIG_SHADOW
-    p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode);
-#endif
-
     /* Set up the shared info structure. */
     update_dom_time(p->shared_info);
 
@@ -852,15 +850,10 @@ int setup_guestos(struct task_struct *p, dom0_createdomain_t *params,
 
     set_bit(PF_CONSTRUCTED, &p->flags);
 
-#ifdef CONFIG_SHADOW
-
-printk("Engage shadow mode for dom 0\n");
-    p->mm.shadowmode = SHM_test; // XXXXX IAP
-    p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode );
+#if 1 // XXXXX IAP DO NOT CHECK IN ENBALED !!!!!!!
+    shadow_mode_enable(p, SHM_test); 
 #endif
 
-
-
     new_thread(p, 
                (unsigned long)virt_load_address, 
                (unsigned long)virt_stack_address, 
index f6e8155f71cd3c4c0193c4314b864f8eb16249f4..1c547130060dc28f9466f3cefd2a5bc73b4d9fea 100644 (file)
@@ -765,20 +765,22 @@ void free_page_type(struct pfn_info *page, unsigned int type)
     {
     case PGT_l1_page_table:
         free_l1_table(page);
-#ifdef CONFIG_SHADOW
-       // assume we're in shadow mode if PSH_shadowed set
-       if ( current->mm.shadowmode && page->shadow_and_flags & PSH_shadowed )
+       if ( unlikely(current->mm.shadow_mode) && 
+            (get_shadow_status(current, page-frame_table) & PSH_shadowed) )
+       {
            unshadow_table( page-frame_table, type );
-#endif
+           put_shadow_status(current);
+        }
        return;
 
     case PGT_l2_page_table:
         free_l2_table(page);
-#ifdef CONFIG_SHADOW
-       // assume we're in shadow mode if PSH_shadowed set
-       if ( current->mm.shadowmode && page->shadow_and_flags & PSH_shadowed )
+       if ( unlikely(current->mm.shadow_mode) && 
+            (get_shadow_status(current, page-frame_table) & PSH_shadowed) )
+       {
            unshadow_table( page-frame_table, type );
-#endif
+           put_shadow_status(current);
+        }
        return;
 
     default:
@@ -848,21 +850,22 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
             put_page_and_type(&frame_table[pagetable_val(current->mm.pagetable)
                                           >> PAGE_SHIFT]);
             current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
-#ifdef CONFIG_SHADOW            
-           current->mm.shadowtable = 
-             shadow_mk_pagetable(pfn << PAGE_SHIFT, current->mm.shadowmode);
-#endif
-            invalidate_shadow_ldt();
 
+           if( unlikely(current->mm.shadow_mode))
+             current->mm.shadow_table = 
+               shadow_mk_pagetable(current, pfn<<PAGE_SHIFT);
+
+            invalidate_shadow_ldt();
+           
+           // start using the new PT straight away
             percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
-#ifdef CONFIG_SHADOW
-            if ( unlikely(current->mm.shadowmode) )
+            if ( unlikely(current->mm.shadow_mode) )
            {
-                check_pagetable( current->mm.pagetable, "pre-stlb-flush" );
-               write_cr3_counted(pagetable_val(current->mm.shadowtable));
+                check_pagetable( current, 
+                                current->mm.pagetable, "pre-stlb-flush" );
+               write_cr3_counted(pagetable_val(current->mm.shadow_table));
             }
             else
-#endif   
                write_cr3_counted(pagetable_val(current->mm.pagetable));
         }
         else
@@ -947,10 +950,8 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
     struct pfn_info *page;
     int rc = 0, okay = 1, i, cpu = smp_processor_id();
     unsigned int cmd;
-#ifdef CONFIG_SHADOW
     unsigned long prev_spfn = 0;
     l1_pgentry_t *prev_spl1e = 0;
-#endif
 
     perfc_incrc(calls_to_mmu_update); 
     perfc_addc(num_page_updates, count);
@@ -1002,11 +1003,14 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
                     okay = mod_l1_entry((l1_pgentry_t *)va, 
                                         mk_l1_pgentry(req.val)); 
 
-#ifdef CONFIG_SHADOW
-                   if ( okay && page->shadow_and_flags & PSH_shadowed )
+                   if ( okay && unlikely(current->mm.shadow_mode) &&
+                        (get_shadow_status(current, page-frame_table) &
+                         PSH_shadowed) )
+                   {
                        shadow_l1_normal_pt_update( req.ptr, req.val, 
                                                    &prev_spfn, &prev_spl1e );
-#endif
+                       put_shadow_status(current);
+                   }
 
                     put_page_type(page);
                 }
@@ -1017,10 +1021,14 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
                     okay = mod_l2_entry((l2_pgentry_t *)va, 
                                         mk_l2_pgentry(req.val),
                                         pfn); 
-#ifdef CONFIG_SHADOW
-                   if ( okay && page->shadow_and_flags & PSH_shadowed )
+
+                   if ( okay && unlikely(current->mm.shadow_mode) &&
+                        (get_shadow_status(current, page-frame_table) & 
+                         PSH_shadowed) )
+                   {
                        shadow_l2_normal_pt_update( req.ptr, req.val );
-#endif
+                       put_shadow_status(current);
+                   }
 
                     put_page_type(page);
                 }
@@ -1032,19 +1040,11 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
                     okay = 1;
                     put_page_type(page);
 
-#ifdef CONFIG_SHADOW
-                   if ( page->shadow_and_flags & PSH_shadowed )
-                       BUG(); 
-                       // at present, we shouldn't be shadowing such pages
-#endif
-
-
+                    // at present, we don't shadowing such pages
                 }
                 break;
             }
 
-check_pagetable( current->mm.pagetable, "mmu" ); // XXX XXX XXX XXX XXX
-            
             put_page(page);
 
             break;
@@ -1087,25 +1087,22 @@ check_pagetable( current->mm.pagetable, "mmu" ); // XXX XXX XXX XXX XXX
     if ( prev_pfn != 0 )
         unmap_domain_mem((void *)va);
 
-#ifdef CONFIG_SHADOW
     if( prev_spl1e != 0 ) 
         unmap_domain_mem((void *)prev_spl1e);
-#endif
 
     deferred_ops = percpu_info[cpu].deferred_ops;
     percpu_info[cpu].deferred_ops = 0;
 
     if ( deferred_ops & DOP_FLUSH_TLB )
     {
-#ifdef CONFIG_SHADOW
-        if ( unlikely(current->mm.shadowmode) )
+        if ( unlikely(current->mm.shadow_mode) )
        {
-            check_pagetable( current->mm.pagetable, "pre-stlb-flush" );
-           write_cr3_counted(pagetable_val(current->mm.shadowtable));
+            check_pagetable( current, 
+                            current->mm.pagetable, "pre-stlb-flush" );
+           write_cr3_counted(pagetable_val(current->mm.shadow_table));
         }
         else
-#endif   
-         write_cr3_counted(pagetable_val(current->mm.pagetable));
+           write_cr3_counted(pagetable_val(current->mm.pagetable));
     }
 
     if ( deferred_ops & DOP_RELOAD_LDT )
@@ -1142,9 +1139,7 @@ int do_update_va_mapping(unsigned long page_nr,
                                 mk_l1_pgentry(val))) )
         err = -EINVAL;
 
-#ifdef CONFIG_SHADOW
-
-    if ( unlikely(p->mm.shadowmode) )
+    if ( unlikely(p->mm.shadow_mode) )
     {
         unsigned long sval = 0;
 
@@ -1164,14 +1159,14 @@ int do_update_va_mapping(unsigned long page_nr,
        {
            // Since L2's are guranteed RW, failure indicates the page
            // was not shadowed, so ignore.
-            
+            perfc_incrc(shadow_update_va_fail);
            //MEM_LOG("update_va_map: couldn't write update\n");        
        }
-    }
 
-check_pagetable( p->mm.pagetable, "va" );
+       check_pagetable( p, p->mm.pagetable, "va" ); // debug
+    
+    }
 
-#endif
 
     deferred_ops = percpu_info[cpu].deferred_ops;
     percpu_info[cpu].deferred_ops = 0;
@@ -1179,12 +1174,10 @@ check_pagetable( p->mm.pagetable, "va" );
     if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || 
          unlikely(flags & UVMF_FLUSH_TLB) )
     {
-#ifdef CONFIG_SHADOW
-        if ( unlikely(p->mm.shadowmode) )
-          write_cr3_counted(pagetable_val(p->mm.shadowtable));
+        if ( unlikely(p->mm.shadow_mode) )
+            write_cr3_counted(pagetable_val(p->mm.shadow_table));
         else
-#endif
-          write_cr3_counted(pagetable_val(p->mm.pagetable));
+            write_cr3_counted(pagetable_val(p->mm.pagetable));
     }
     else if ( unlikely(flags & UVMF_INVLPG) )
         __flush_tlb_one(page_nr << PAGE_SHIFT);
index af9abbb67c7bc6a61d78060ebe14a63d670a93c7..1d24c8fd0425ea7f96cb99a9bff33d98299e2a9a 100644 (file)
@@ -103,7 +103,7 @@ void perfc_reset(u_char key, void *dev_id, struct pt_regs *regs)
             for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
                atomic_set(&counters[j],0);
         case TYPE_S_ARRAY:
-            counters += j;
+            counters += perfc_info[i].nr_elements;
             break;
         }
     }
index a0df57d8f116284b4987c27e7bc9888de98b1a07..c1e25f5a52b5209040e9ac2c4e14a6a2dca53ffd 100644 (file)
 #include <asm/domain_page.h>
 #include <asm/page.h>
 
-#ifdef CONFIG_SHADOW
-
-
-#if SHADOW_DEBUG
-#define MEM_VLOG(_f, _a...)                             \
-  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
-         current->domain , __LINE__ , ## _a )
-#else
-#define MEM_VLOG(_f, _a...) 
-#endif
-
-#if 0
-#define MEM_VVLOG(_f, _a...)                             \
-  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
-         current->domain , __LINE__ , ## _a )
-#else
-#define MEM_VVLOG(_f, _a...) 
-#endif
-
 
 /********
 
 To use these shadow page tables, guests must not rely on the ACCESSED
 and DIRTY bits on L2 pte's being accurate -- they will typically all be set.
 
+
 I doubt this will break anything. (If guests want to use the va_update
 mechanism they've signed up for this anyhow...)
 
 ********/
 
 
-pagetable_t shadow_mk_pagetable( unsigned long gptbase, 
-                                       unsigned int shadowmode )
+int shadow_mode_enable( struct task_struct *p, unsigned int mode )
 {
-       unsigned long gpfn, spfn=0;
+       struct shadow_status **fptr;
+       int i;
+
+       // sychronously stop domain
+    // XXX for the moment, only use on already stopped domains!!!
+
+       spin_lock_init(&p->mm.shadow_lock);
+       spin_lock(&p->mm.shadow_lock);
+
+    p->mm.shadow_mode = mode;
+       
+       // allocate hashtable
+    p->mm.shadow_ht = kmalloc( shadow_ht_buckets * 
+                                                          sizeof(struct shadow_status), GFP_KERNEL );
+       if( ! p->mm.shadow_ht )
+               goto nomem;
 
-       MEM_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
-                        gptbase, shadowmode );
+       memset( p->mm.shadow_ht, 0, shadow_ht_buckets * 
+                                                          sizeof(struct shadow_status) );
 
-       if ( unlikely(shadowmode) ) 
+
+       // allocate space for first lot of extra nodes
+    p->mm.shadow_ht_extras = kmalloc( sizeof(void*) + (shadow_ht_extra_size * 
+                                                          sizeof(struct shadow_status)), GFP_KERNEL );
+
+       if( ! p->mm.shadow_ht_extras )
+               goto nomem;
+
+       memset( p->mm.shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size * 
+                                                          sizeof(struct shadow_status)) );
+       
+    // add extras to free list
+       fptr = &p->mm.shadow_ht_free;
+       for ( i=0; i<shadow_ht_extra_size; i++ )
        {
-               gpfn =  gptbase >> PAGE_SHIFT;
-               
-               if ( likely(frame_table[gpfn].shadow_and_flags & PSH_shadowed) )
-               {
-                       spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+               *fptr = &p->mm.shadow_ht_extras[i];
+               fptr = &(p->mm.shadow_ht_extras[i].next);
+       }
+       *fptr = NULL;
+       *((struct shadow_status ** ) &p->mm.shadow_ht_extras[shadow_ht_extra_size]) = NULL;
+
+       spin_unlock(&p->mm.shadow_lock);
+
+    // call shadow_mk_pagetable
+       p->mm.shadow_table = shadow_mk_pagetable( p, 
+                                                                                         pagetable_val(p->mm.pagetable) );
+
+       return 0;
+
+nomem:
+       spin_unlock(&p->mm.shadow_lock);
+       return -ENOMEM;
+}
+
+void shadow_mode_disable( )
+{
+
+    // free the hash buckets as you go
+
+    // free the hashtable itself
+}
+
+
+static inline void free_shadow_page( struct task_struct *p, unsigned int pfn )
+{
+    unsigned long flags;
+
+       p->mm.shadow_page_count--;
+
+    spin_lock_irqsave(&free_list_lock, flags);
+    list_add(&frame_table[pfn].list, &free_list);
+    free_pfns++;
+    spin_unlock_irqrestore(&free_list_lock, flags);
+}
+
+static inline struct pfn_info *alloc_shadow_page( struct task_struct *p )
+{
+       p->mm.shadow_page_count++;
+
+       return alloc_domain_page( NULL );
+}
+
+
+static void __free_shadow_table( struct task_struct *p )
+{
+       int j;
+       struct shadow_status *a;
+       
+       // the code assumes you're not using the page tables i.e.
+    // the domain is stopped and cr3 is something else!!
+
+    // walk the hash table and call free_shadow_page on all pages
+
+    for(j=0;j<shadow_ht_buckets;j++)
+    {
+        a = &p->mm.shadow_ht[j];        
+        if (a->pfn)
+        {
+            free_shadow_page( p, a->spfn_and_flags & PSH_pfn_mask );
+            a->pfn = 0;
+            a->spfn_and_flags = 0;
+        }
+        a=a->next;
+        while(a)
+               { 
+            struct shadow_status *next = a->next;
+            free_shadow_page( p, a->spfn_and_flags & PSH_pfn_mask );
+            a->pfn = 0;
+            a->spfn_and_flags = 0;
+            a->next = p->mm.shadow_ht_free;
+            p->mm.shadow_ht_free = a;
+            a=next;
                }
-               else
-               {
-                       spfn = shadow_l2_table( gpfn );
-               }      
        }
+}
+
+static void flush_shadow_table( struct task_struct *p )
+{
+       
+    // XXX synchronously stop domain (needed for SMP guests)
+
+    // switch to idle task's page tables
+    // walk the hash table and call free_shadow_page on all pages
+       spin_lock(&p->mm.shadow_lock);
+       __free_shadow_table( p );
+       spin_unlock(&p->mm.shadow_lock);
 
-       return mk_pagetable(spfn << PAGE_SHIFT);
+    // XXX unpause domain
 }
 
+
+
 void unshadow_table( unsigned long gpfn, unsigned int type )
 {
        unsigned long spfn;
 
-    MEM_VLOG("unshadow_table type=%08x gpfn=%08lx, spfn=%08lx",
+    SH_VLOG("unshadow_table type=%08x gpfn=%08lx",
                 type,
-                gpfn,
-                frame_table[gpfn].shadow_and_flags & PSH_pfn_mask );
+                gpfn );
 
        perfc_incrc(unshadow_table_count);
 
@@ -79,9 +170,8 @@ void unshadow_table( unsigned long gpfn, unsigned int type )
        // even in the SMP guest case, there won't be a race here as
     // this CPU was the one that cmpxchg'ed the page to invalid
 
-       spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
-       frame_table[gpfn].shadow_and_flags=0;
-       frame_table[spfn].shadow_and_flags=0;
+       spfn = __shadow_status(current, gpfn) & PSH_pfn_mask;
+       delete_shadow_status(current, gpfn);
 
 #if 0 // XXX leave as might be useful for later debugging
        { 
@@ -101,27 +191,21 @@ void unshadow_table( unsigned long gpfn, unsigned int type )
     else
                perfc_decr(shadow_l2_pages);
 
-       //free_domain_page( &frame_table[spfn] );
-
-       {
-    unsigned long flags;
-    spin_lock_irqsave(&free_list_lock, flags);
-    list_add(&frame_table[spfn].list, &free_list);
-    free_pfns++;
-    spin_unlock_irqrestore(&free_list_lock, flags);
-       }
+       free_shadow_page( current, spfn );
 
 }
 
 
-unsigned long shadow_l2_table( unsigned long gpfn )
+static unsigned long shadow_l2_table( 
+                     struct task_struct *p, unsigned long gpfn )
 {
        struct pfn_info *spfn_info;
        unsigned long spfn;
        l2_pgentry_t *spl2e, *gpl2e;
        int i;
 
-       MEM_VVLOG("shadow_l2_table( %08lx )",gpfn);
+       SH_VVLOG("shadow_l2_table( %08lx )",gpfn);
+       spin_lock(&p->mm.shadow_lock);
 
        perfc_incrc(shadow_l2_table_count);
        perfc_incr(shadow_l2_pages);
@@ -129,17 +213,14 @@ unsigned long shadow_l2_table( unsigned long gpfn )
     // XXX in future, worry about racing in SMP guests 
     //      -- use cmpxchg with PSH_pending flag to show progress (and spin)
 
-       spfn_info = alloc_domain_page( NULL ); // XXX account properly later 
+       spfn_info = alloc_shadow_page(p);
 
     ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache
 
        spfn = (unsigned long) (spfn_info - frame_table);
 
        // mark pfn as being shadowed, update field to point at shadow
-       frame_table[gpfn].shadow_and_flags = spfn | PSH_shadowed;
-
-       // mark shadow pfn as being a shadow, update field to point at  pfn     
-       frame_table[spfn].shadow_and_flags = gpfn | PSH_shadow;
+       set_shadow_status(p, gpfn, spfn | PSH_shadowed);
        
        // we need to do this before the linear map is set up
        spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
@@ -172,11 +253,11 @@ unsigned long shadow_l2_table( unsigned long gpfn )
                if (gpte & _PAGE_PRESENT)
                {
                        unsigned long s_sh = 
-                               frame_table[ gpte>>PAGE_SHIFT ].shadow_and_flags;
+                               __shadow_status(p, gpte>>PAGE_SHIFT);
 
                        if( s_sh & PSH_shadowed ) // PSH_shadowed
                        {
-                               if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) )
+                               if ( unlikely( (__shadow_status(p, gpte>>PAGE_SHIFT) & PGT_type_mask) == PGT_l2_page_table) )
                 {
                                        printk("Linear mapping detected\n");
                                    spte = gpte & ~_PAGE_RW;
@@ -203,33 +284,61 @@ unsigned long shadow_l2_table( unsigned long gpfn )
     unmap_domain_mem( gpl2e );
     unmap_domain_mem( spl2e );
 
-       MEM_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
-
+       SH_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
 
+       spin_unlock(&p->mm.shadow_lock);
        return spfn;
 }
 
+pagetable_t shadow_mk_pagetable( struct task_struct *p, 
+                                                                                          unsigned long gptbase)
+{
+       unsigned long gpfn, spfn=0;
+
+       SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
+                        gptbase, p->mm.shadow_mode );
+
+       if ( likely(p->mm.shadow_mode) )  // should always be true if we're here
+       {
+               gpfn =  gptbase >> PAGE_SHIFT;
+               
+               if ( unlikely((spfn=__shadow_status(p, gpfn)) == 0 ) )
+               {
+                       spfn = shadow_l2_table(p, gpfn );
+               }      
+       }
+
+       SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
+                        gptbase, p->mm.shadow_mode );
+
+       return mk_pagetable(spfn<<PAGE_SHIFT);
+}
 
 int shadow_fault( unsigned long va, long error_code )
 {
        unsigned long gpte, spte;
 
-       MEM_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
+       SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
 
-    check_pagetable( current->mm.pagetable, "pre-sf" );
+    spin_lock(&current->mm.shadow_lock);
+
+    check_pagetable( current, current->mm.pagetable, "pre-sf" );
 
        if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
        {
-               MEM_VVLOG("shadow_fault - EXIT: read gpte faulted" );
+               SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
+        spin_unlock(&current->mm.shadow_lock);
                return 0;  // propagate to guest
        }
 
        if ( ! (gpte & _PAGE_PRESENT) )
        {
-               MEM_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
+               SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
+        spin_unlock(&current->mm.shadow_lock);
                return 0;  // we're not going to be able to help
     }
 
+
     spte = gpte;
 
        if ( error_code & 2  )  
@@ -242,7 +351,8 @@ int shadow_fault( unsigned long va, long error_code )
                }
                else
                {   // write fault on RO page
-            MEM_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
+            SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
+            spin_unlock(&current->mm.shadow_lock);
                        return 0; // propagate to guest
                        // not clear whether we should set accessed bit here...
                }
@@ -255,7 +365,7 @@ int shadow_fault( unsigned long va, long error_code )
                        spte &= ~_PAGE_RW;  // force clear unless already dirty
        }
 
-       MEM_VVLOG("plan: gpte=%08lx  spte=%08lx", gpte, spte );
+       SH_VVLOG("plan: gpte=%08lx  spte=%08lx", gpte, spte );
 
        // write back updated gpte
     // XXX watch out for read-only L2 entries! (not used in Linux)
@@ -269,13 +379,13 @@ int shadow_fault( unsigned long va, long error_code )
 
                unsigned long gpde, spde, gl1pfn, sl1pfn;
 
-        MEM_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx  spte=%08lx",gpte,spte );
+        SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx  spte=%08lx",gpte,spte );
 
         gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
 
         gl1pfn = gpde>>PAGE_SHIFT;
 
-        if ( ! (frame_table[gl1pfn].shadow_and_flags & PSH_shadowed ) )
+        if ( ! (sl1pfn=__shadow_status(current, gl1pfn) ) )
         {
             // this L1 is NOT already shadowed so we need to shadow it
             struct pfn_info *sl1pfn_info;
@@ -284,12 +394,11 @@ int shadow_fault( unsigned long va, long error_code )
             sl1pfn_info = alloc_domain_page( NULL ); // XXX account properly! 
             sl1pfn = sl1pfn_info - frame_table;
 
-            MEM_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
+            SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
                perfc_incrc(shadow_l1_table_count);
                perfc_incr(shadow_l1_pages);
 
-            sl1pfn_info->shadow_and_flags = PSH_shadow | gl1pfn;
-            frame_table[gl1pfn].shadow_and_flags = PSH_shadowed | sl1pfn;
+            set_shadow_status(current, gl1pfn, PSH_shadowed | sl1pfn);
 
             gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
             spde = (gpde & ~PAGE_MASK) | _PAGE_RW | (sl1pfn<<PAGE_SHIFT);
@@ -330,9 +439,7 @@ int shadow_fault( unsigned long va, long error_code )
             // this L1 was shadowed (by another PT) but we didn't have an L2
             // entry for it
 
-            sl1pfn = frame_table[gl1pfn].shadow_and_flags & PSH_pfn_mask;
-
-            MEM_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
+            SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
 
                    spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
 
@@ -341,7 +448,7 @@ int shadow_fault( unsigned long va, long error_code )
 
                        if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gl1pfn<<PAGE_SHIFT)  ) )
                        {   // detect linear map, and keep pointing at guest
-                MEM_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
+                SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
                                spde = (spde & ~PAGE_MASK) | (gl1pfn<<PAGE_SHIFT);
                        }
 
@@ -358,7 +465,9 @@ int shadow_fault( unsigned long va, long error_code )
        
     perfc_incrc(shadow_fixup_count);
 
-    check_pagetable( current->mm.pagetable, "post-sf" );
+    check_pagetable( current, current->mm.pagetable, "post-sf" );
+
+    spin_unlock(&current->mm.shadow_lock);
 
     return 1; // let's try the faulting instruction again...
 
@@ -373,13 +482,13 @@ void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
     l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr;
 
 
-MEM_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n",
+SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n",
 pa,gpte,prev_spfn, prev_spl1e);
 
     // to get here, we know the l1 page *must* be shadowed
 
     gpfn = pa >> PAGE_SHIFT;
-    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+    spfn = __shadow_status(current, gpfn) & PSH_pfn_mask;
 
     if ( spfn == prev_spfn )
     {
@@ -417,21 +526,23 @@ void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte )
 {
     unsigned long gpfn, spfn, spte;
     l2_pgentry_t * sp2le;
-    unsigned long s_sh;
+    unsigned long s_sh=0;
 
-    MEM_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
+    SH_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
 
     // to get here, we know the l2 page has a shadow
 
     gpfn = pa >> PAGE_SHIFT;
-    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+    spfn = __shadow_status(current, gpfn) & PSH_pfn_mask;
 
-    sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
-    // no real need for a cache here
 
     spte = 0;
 
-    s_sh = frame_table[gpte >> PAGE_SHIFT].shadow_and_flags;
+       if( gpte & _PAGE_PRESENT )
+               s_sh = __shadow_status(current, gpte >> PAGE_SHIFT);
+
+    sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
+    // no real need for a cache here
                
        if ( s_sh ) // PSH_shadowed
        {
@@ -463,7 +574,8 @@ char * sh_check_name;
 #define FAIL(_f, _a...)                             \
 {printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n",  sh_check_name, level, i, ## _a , gpte, spte ); BUG();}
 
-int check_pte( unsigned long gpte, unsigned long spte, int level, int i )
+static int check_pte( struct task_struct *p, 
+                          unsigned long gpte, unsigned long spte, int level, int i )
 {
        unsigned long mask, gpfn, spfn;
 
@@ -504,42 +616,24 @@ int check_pte( unsigned long gpte, unsigned long spte, int level, int i )
                if ( level > 1 )
                        FAIL("Linear map ???");                  // XXX this will fail on BSD
 
-#if 0 // might be a RO mapping of a page table page
-               if ( frame_table[gpfn].shadow_and_flags != 0 )
-        {
-                       FAIL("Should have been shadowed g.sf=%08lx s.sf=%08lx", 
-                                frame_table[gpfn].shadow_and_flags,
-                                frame_table[spfn].shadow_and_flags);
-        }
-               else
-#endif
-                       return 1;
+               return 1;
        }
        else
        {
                if ( level < 2 )
                        FAIL("Shadow in L1 entry?");
 
-               if ( frame_table[gpfn].shadow_and_flags != (PSH_shadowed | spfn) )
-                       FAIL("spfn problem g.sf=%08lx s.sf=%08lx [g.sf]=%08lx [s.sf]=%08lx", 
-                                frame_table[gpfn].shadow_and_flags,
-                                frame_table[spfn].shadow_and_flags,
-                                frame_table[frame_table[gpfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags,
-                                frame_table[frame_table[spfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags
-                                );
-
-               if ( frame_table[spfn].shadow_and_flags != (PSH_shadow | gpfn) )
-                       FAIL("gpfn problem g.sf=%08lx s.sf=%08lx", 
-                                frame_table[gpfn].shadow_and_flags,
-                                frame_table[spfn].shadow_and_flags);
-
+               if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) )
+                       FAIL("spfn problem g.sf=%08lx", 
+                                __shadow_status(p, gpfn) );
        }
 
        return 1;
 }
 
 
-int check_l1_table( unsigned long va, unsigned long g2, unsigned long s2 )
+static int check_l1_table( struct task_struct *p, unsigned long va, 
+                                       unsigned long g2, unsigned long s2 )
 {
        int j;
        unsigned long *gpl1e, *spl1e;
@@ -555,7 +649,7 @@ int check_l1_table( unsigned long va, unsigned long g2, unsigned long s2 )
                unsigned long gpte = gpl1e[j];
                unsigned long spte = spl1e[j];
                
-               check_pte( gpte, spte, 1, j );
+               check_pte( p, gpte, spte, 1, j );
        }
        
        unmap_domain_mem( spl1e );
@@ -567,7 +661,7 @@ int check_l1_table( unsigned long va, unsigned long g2, unsigned long s2 )
 #define FAILPT(_f, _a...)                             \
 {printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); BUG();}
 
-int check_pagetable( pagetable_t pt, char *s )
+int check_pagetable( struct task_struct *p, pagetable_t pt, char *s )
 {
        unsigned long gptbase = pagetable_val(pt);
        unsigned long gpfn, spfn;
@@ -576,29 +670,26 @@ int check_pagetable( pagetable_t pt, char *s )
 
        sh_check_name = s;
 
-    MEM_VVLOG("%s-PT Audit",s);
+    SH_VVLOG("%s-PT Audit",s);
 
        sh_l2_present = sh_l1_present = 0;
 
        gpfn =  gptbase >> PAGE_SHIFT;
 
-       if ( ! (frame_table[gpfn].shadow_and_flags & PSH_shadowed) )
+       if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) )
        {
                printk("%s-PT %08lx not shadowed\n", s, gptbase);
 
-               if( frame_table[gpfn].shadow_and_flags != 0 ) BUG();
+               if( __shadow_status(p, gpfn) != 0 ) BUG();
 
                return 0;
        }
        
-    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+    spfn = __shadow_status(p, gpfn) & PSH_pfn_mask;
 
-       if ( ! frame_table[gpfn].shadow_and_flags == (PSH_shadowed | spfn) )
+       if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) )
                FAILPT("ptbase shadow inconsistent1");
 
-       if ( ! frame_table[spfn].shadow_and_flags == (PSH_shadow | gpfn) )
-               FAILPT("ptbase shadow inconsistent2");
-
        gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
        spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
 
@@ -641,7 +732,7 @@ int check_pagetable( pagetable_t pt, char *s )
                unsigned long gpte = l2_pgentry_val(gpl2e[i]);
                unsigned long spte = l2_pgentry_val(spl2e[i]);
 
-               check_pte( gpte, spte, 2, i );
+               check_pte( p, gpte, spte, 2, i );
        }
 
 
@@ -652,7 +743,7 @@ int check_pagetable( pagetable_t pt, char *s )
                unsigned long spte = l2_pgentry_val(spl2e[i]);
 
                if ( spte )        
-                       check_l1_table( 
+                       check_l1_table( p,
                                i<<L2_PAGETABLE_SHIFT,
                                gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
 
@@ -661,7 +752,7 @@ int check_pagetable( pagetable_t pt, char *s )
        unmap_domain_mem( spl2e );
        unmap_domain_mem( gpl2e );
 
-       MEM_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
+       SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
                   sh_l2_present, sh_l1_present );
        
        return 1;
@@ -671,7 +762,6 @@ int check_pagetable( pagetable_t pt, char *s )
 #endif
 
 
-#endif // CONFIG_SHADOW
 
 
 
index 0496f481d9e15ed32f43d2d346a17da629075b35..e5b380618f8d199ed1375c381518a8e505125eda 100644 (file)
@@ -40,8 +40,6 @@
 
 #define CONFIG_XEN_ATTENTION_KEY 1
 
-#define CONFIG_SHADOW 1
-
 
 #define HZ 100
 
index 9766ac7b209be81d21fcd80860f4559dca801e71..7cf48541efd52095ddb6954c7d967fce795a1fe0 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/cpufeature.h>
 #include <asm/desc.h>
 #include <xeno/config.h>
+#include <xeno/spinlock.h>
 #include <hypervisor-ifs/hypervisor-if.h>
 
 struct task_struct;
@@ -416,10 +417,14 @@ struct mm_struct {
     l1_pgentry_t *perdomain_pt;
     pagetable_t  pagetable;
 
-#ifdef CONFIG_SHADOW
-    unsigned int shadowmode;  /* flags to control shadow table operation */
-    pagetable_t  shadowtable;
-#endif
+    unsigned int shadow_mode;  /* flags to control shadow table operation */
+    pagetable_t  shadow_table;
+    spinlock_t shadow_lock;
+    struct shadow_status *shadow_ht;
+    struct shadow_status *shadow_ht_free;
+    struct shadow_status *shadow_ht_extras; // extra allocation units
+    unsigned int shadow_page_count;
+    unsigned int shadow_max_page_count;
 
     /* Current LDT details. */
     unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
index c1df341a283c21916730b116f069d1d0dade4971..68eca9807eeb49ec10205d2eb312237d899fd209 100644 (file)
@@ -67,10 +67,6 @@ struct pfn_info
     unsigned long       type_and_flags;
     /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
     unsigned long       tlbflush_timestamp;
-#ifdef CONFIG_SHADOW
-    /* Shadow page status: top bits flags, bottom bits are a pfn */
-    unsigned long       shadow_and_flags;  
-#endif
 };
 
  /* The following page types are MUTUALLY EXCLUSIVE. */
index 7034081b48f09035bb5e321e374c32ef224da442..212a0dbfbb7abab26e5d93091a0844138dd1a339 100644 (file)
@@ -3,15 +3,13 @@
 #ifndef _XENO_SHADOW_H
 #define _XENO_SHADOW_H
 
-#ifdef CONFIG_SHADOW
-
 #include <xeno/config.h>
 #include <xeno/types.h>
 #include <xeno/mm.h>
+#include <xeno/perfc.h>
 
 /* Shadow PT flag bits in pfn_info */
 #define PSH_shadowed   (1<<31) /* page has a shadow. PFN points to shadow */
-#define PSH_shadow         (1<<30) /* page is a shadow. PFN points to orig page */
 #define PSH_pending        (1<<29) /* page is in the process of being shadowed */
 #define PSH_pfn_mask   ((1<<21)-1)
 
 #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
 #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
 
-extern pagetable_t shadow_mk_pagetable( unsigned long gptbase, unsigned int shadowmode );
-extern void unshadow_table( unsigned long gpfn, unsigned int type );
-extern unsigned long shadow_l2_table( unsigned long gpfn );
+extern pagetable_t shadow_mk_pagetable( struct task_struct *p, 
+                                                                               unsigned long gptbase);
 extern int shadow_fault( unsigned long va, long error_code );
 extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, 
                                                                                unsigned long *prev_spfn_ptr,
                                                                                l1_pgentry_t **prev_spl1e_ptr  );
 extern void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte );
-
+extern void unshadow_table( unsigned long gpfn, unsigned int type );
+extern int shadow_mode_enable( struct task_struct *p, unsigned int mode );
 
 #define SHADOW_DEBUG 0
+#define SHADOW_HASH_DEBUG 0
 #define SHADOW_OPTIMISE 1
 
-#endif // end of CONFIG_SHADOW
+struct shadow_status {
+    unsigned long pfn;            // gpfn 
+    unsigned long spfn_and_flags; // spfn plus flags
+    struct shadow_status *next;   // use pull-to-front list.
+};
+
+#define shadow_ht_extra_size         128 /*128*/
+#define shadow_ht_buckets            256 /*256*/
+
+#ifndef NDEBUG
+#define SH_LOG(_f, _a...)                             \
+  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+         current->domain , __LINE__ , ## _a )
+#else
+#define SH_LOG(_f, _a...) 
+#endif
 
 #if SHADOW_DEBUG
-extern int check_pagetable( pagetable_t pt, char *s );
+#define SH_VLOG(_f, _a...)                             \
+  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+         current->domain , __LINE__ , ## _a )
+#else
+#define SH_VLOG(_f, _a...) 
+#endif
+
+#if 0
+#define SH_VVLOG(_f, _a...)                             \
+  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+         current->domain , __LINE__ , ## _a )
+#else
+#define SH_VVLOG(_f, _a...) 
+#endif
+
+
+
+#if SHADOW_HASH_DEBUG
+static void shadow_audit(struct task_struct *p, int print)
+{
+       int live=0, free=0, j=0, abs;
+       struct shadow_status *a;
+       
+    for(j=0;j<shadow_ht_buckets;j++)
+    {
+        a = &p->mm.shadow_ht[j];        
+               if(a->pfn) live++;
+        while(a->next && live<9999)
+               { 
+                       live++; 
+                       if(a->pfn == 0)
+                       {
+                               printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n",
+                                          live, a->pfn, a->spfn_and_flags, a->next);
+                               BUG();
+                       }
+                       a=a->next; 
+               }
+               ASSERT(live<9999);
+       }
+
+    a = p->mm.shadow_ht_free;
+    while(a) { free++; a=a->next; }
+
+    if(print) printk("live=%d free=%d\n",live,free);
+
+       abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live;
+       if( abs < -1 || abs > 1 )
+       {
+               printk("live=%d free=%d l1=%d l2=%d\n",live,free,
+                         perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) );
+               BUG();
+    }
+
+}
+
 #else
-#define check_pagetable( pt, s )
+#define shadow_audit(p, print)
+#endif
+
+static inline struct shadow_status* hash_bucket( struct task_struct *p,
+                                                                                                unsigned int gpfn )
+{
+    return &(p->mm.shadow_ht[gpfn % shadow_ht_buckets]);
+}
+
+
+static inline unsigned long __shadow_status( struct task_struct *p,
+                                                                                  unsigned int gpfn )
+{
+       struct shadow_status **ob, *b, *B = hash_bucket( p, gpfn );
+
+    b = B;
+    ob = NULL;
+
+       SH_VVLOG("lookup gpfn=%08lx bucket=%08lx", gpfn, b );
+       shadow_audit(p,0);  // if in debug mode
+
+       do
+       {
+               if ( b->pfn == gpfn )
+               {
+                       unsigned long t;
+                       struct shadow_status *x;
+
+                       // swap with head
+                       t=B->pfn; B->pfn=b->pfn; b->pfn=t;
+                       t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags; 
+                           b->spfn_and_flags=t;
+
+                       if(ob)
+                       {   // pull to front
+                               *ob=b->next;
+                               x=B->next;
+                               B->next=b;      
+                               b->next=x;
+                       }                       
+                       return B->spfn_and_flags;
+               }
+#if SHADOW_HASH_DEBUG
+               else
+               {
+                       if(b!=B)ASSERT(b->pfn);
+               }
 #endif
+               ob=&b->next;
+               b=b->next;
+       }
+       while (b);
+
+       return 0;
+}
+
+/* we can make this locking more fine grained e.g. per shadow page if it 
+ever becomes a problem, but since we need a spin lock on the hash table 
+anyway its probably not worth being too clever. */
+
+static inline unsigned long get_shadow_status( struct task_struct *p,
+                                                                                  unsigned int gpfn )
+{
+       unsigned long res;
+
+       spin_lock(&p->mm.shadow_lock);
+       res = __shadow_status( p, gpfn );
+       if (!res) spin_unlock(&p->mm.shadow_lock);
+       return res;
+}
+
+
+static inline void put_shadow_status( struct task_struct *p )
+{
+       spin_unlock(&p->mm.shadow_lock);
+}
 
 
+static inline void delete_shadow_status( struct task_struct *p,
+                                                                         unsigned int gpfn )
+{
+       struct shadow_status *b, *B, **ob;
+
+       B = b = hash_bucket( p, gpfn );
+
+       SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b );
+       shadow_audit(p,0);
+       ASSERT(gpfn);
+
+       if( b->pfn == gpfn )
+    {
+               if (b->next)
+               {
+                       struct shadow_status *D=b->next;
+                       b->spfn_and_flags = b->next->spfn_and_flags;
+                       b->pfn = b->next->pfn;
+
+                       b->next = b->next->next;
+                       D->next = p->mm.shadow_ht_free;
+                       p->mm.shadow_ht_free = D;
+               }
+               else
+               {
+                       b->pfn = 0;
+                       b->spfn_and_flags = 0;
+               }
+
+#if SHADOW_HASH_DEBUG
+               if( __shadow_status(p,gpfn) ) BUG();  
+#endif
+               return;
+    }
+
+       ob = &b->next;
+       b=b->next;
+
+       do
+       {
+               if ( b->pfn == gpfn )                   
+               {
+                       b->pfn = 0;
+                       b->spfn_and_flags = 0;
+
+                       // b is in the list
+            *ob=b->next;
+                       b->next = p->mm.shadow_ht_free;
+                       p->mm.shadow_ht_free = b;
+
+#if SHADOW_HASH_DEBUG
+                       if( __shadow_status(p,gpfn) ) BUG();
+#endif
+                       return;
+               }
+
+               ob = &b->next;
+               b=b->next;
+       }
+       while (b);
+
+       // if we got here, it wasn't in the list
+    BUG();
+}
+
+
+static inline void set_shadow_status( struct task_struct *p,
+                                                                         unsigned int gpfn, unsigned long s )
+{
+       struct shadow_status *b, *B, *extra, **fptr;
+    int i;
+
+       B = b = hash_bucket( p, gpfn );
+   
+    ASSERT(gpfn);
+    ASSERT(s);
+    SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, b, b->next );
+    shadow_audit(p,0);
+
+       do
+       {
+               if ( b->pfn == gpfn )                   
+               {
+                       b->spfn_and_flags = s;
+                       return;
+               }
+
+               b=b->next;
+       }
+       while (b);
+
+       // if we got here, this is an insert rather than update
+
+    ASSERT( s );  // deletes must have succeeded by here
+
+    if ( B->pfn == 0 )
+       {
+               // we can use this head
+        ASSERT( B->next == 0 );
+               B->pfn = gpfn;
+               B->spfn_and_flags = s;
+               return;
+       }
+
+    if( unlikely(p->mm.shadow_ht_free == NULL) )
+    {
+        SH_LOG("allocate more shadow hashtable blocks");
+
+        // we need to allocate more space
+        extra = kmalloc( sizeof(void*) + (shadow_ht_extra_size * 
+                                                          sizeof(struct shadow_status)), GFP_KERNEL );
+
+           if( ! extra ) BUG(); // should be more graceful here....
+
+           memset( extra, 0, sizeof(void*) + (shadow_ht_extra_size * 
+                                                          sizeof(struct shadow_status)) );
+       
+        // add extras to free list
+           fptr = &p->mm.shadow_ht_free;
+           for ( i=0; i<shadow_ht_extra_size; i++ )
+           {
+                   *fptr = &extra[i];
+                   fptr = &(extra[i].next);
+           }
+           *fptr = NULL;
+
+           *((struct shadow_status ** ) &p->mm.shadow_ht[shadow_ht_extra_size]) = 
+                                            p->mm.shadow_ht_extras;
+        p->mm.shadow_ht_extras = extra;
+
+    }
+
+       // should really put this in B to go right to front
+       b = p->mm.shadow_ht_free;
+    p->mm.shadow_ht_free = b->next;
+    b->spfn_and_flags = s;
+       b->pfn = gpfn;
+       b->next = B->next;
+       B->next = b;
+
+       return;
+}
+
+
+
+#if SHADOW_DEBUG
+extern int check_pagetable( struct task_struct *p, pagetable_t pt, char *s );
+#else
+#define check_pagetable( p, pt, s )
+#endif
 
 
 #endif
index bb25e6a2b9b4e84098edc6b426ce40897a06bf87..5d692f51f4db9f5321936e740410a73fb90ea51e 100644 (file)
@@ -494,6 +494,7 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
     unsigned short size;
     unsigned char  offset, status = RING_STATUS_OK;
     struct task_struct *p = vif->domain;
+    unsigned long spte_pfn;
 
     memcpy(skb->mac.ethernet->h_dest, vif->vmac, ETH_ALEN);
     if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
@@ -546,21 +547,18 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
         goto out;
     }
 
-
-#ifdef CONFIG_SHADOW
-    if ( pte_page->shadow_and_flags & PSH_shadowed )
+    if ( p->mm.shadow_mode && 
+        (spte_pfn=get_shadow_status(p, pte_page-frame_table)) )
     {
-        unsigned long spte_pfn = pte_page->shadow_and_flags & PSH_pfn_mask;
        unsigned long *sptr = map_domain_mem( (spte_pfn<<PAGE_SHIFT) |
                        (((unsigned long)ptep)&~PAGE_MASK) );
 
-        // save the fault later
+        // avoid the fault later
        *sptr = new_pte;
 
-       unmap_domain_mem( sptr );
+       unmap_domain_mem(sptr);
+       put_shadow_status(p);
     }
-#endif
-
 
     machine_to_phys_mapping[new_page - frame_table] 
         = machine_to_phys_mapping[old_page - frame_table];
@@ -2068,7 +2066,7 @@ static void get_rx_bufs(net_vif_t *vif)
     rx_shadow_entry_t *srx;
     unsigned long  pte_pfn, buf_pfn;
     struct pfn_info *pte_page, *buf_page;
-    unsigned long *ptep, pte;
+    unsigned long *ptep, pte, spfn;
 
     spin_lock(&vif->rx_lock);
 
@@ -2114,21 +2112,16 @@ static void get_rx_bufs(net_vif_t *vif)
             goto rx_unmap_and_continue;
         }
 
-#ifdef CONFIG_SHADOW
-       {
-           if ( frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_shadowed )
-             {
-               unsigned long spfn = 
-                 frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_pfn_mask;
-               unsigned long * sptr = map_domain_mem( (spfn<<PAGE_SHIFT) | (rx.addr&~PAGE_MASK) );
+       if ( p->mm.shadow_mode && 
+            (spfn=get_shadow_status(p, rx.addr>>PAGE_SHIFT)) )
+         {
+           unsigned long * sptr = 
+             map_domain_mem( (spfn<<PAGE_SHIFT) | (rx.addr&~PAGE_MASK) );
 
-               *sptr = 0;
-               unmap_domain_mem( sptr );
-
-             }
-
-       }
-#endif
+           *sptr = 0;
+           unmap_domain_mem( sptr );
+           put_shadow_status(p);
+         }
         
         buf_pfn  = pte >> PAGE_SHIFT;
         buf_page = &frame_table[buf_pfn];